# Goal: Regression analyses in the SM (Tables A6 & A7)
# by Jennifer Pan and Yiqing Xu


################################################
## Preferences and Regime Support
################################################


# Table A7 — coefficients with SE on the next row; export to .xlsx with row names

library(fixest)
library(writexl)
library(haven)
d <- read_dta("data/sample1.dta")


# variable lists
ideology <- c("index_poli","index_econ","index_nati","index_trad","index_equi","index_ethn")
covars   <- c("female","age","age_sq","hschool","college","urbanhukou","ccp","han","married",
              "inc_low","inc_high","class_percep","religious","knowledge")
outcomes <- c("idx_support", grep("^support", names(d), value = TRUE),
              "trust_center","trust_local")

row_lbl <- c(index_poli="Politically liberal",
             index_econ="Pro-market",
             index_nati="Nationalistic",
             index_trad="Traditionalist",
             index_equi="Pro-equality",
             index_ethn="Minority accommodating")

col_lbl <- c("(1) Additive Index","(2) Solve problems","(3) Be proud",
             "(4) Should support","(5) Prefer live under","(6) Central trust","(7) Municipal trust")

# run regressions
fits <- lapply(outcomes, function(y)
  feols(as.formula(paste(y, "~", paste(c(ideology, covars), collapse = " + "), "| prov")),
        data = d, vcov = "hetero")
)
# 3) Build 2-rows-per-ideology matrix
P <- length(ideology); K <- length(fits)
coef_mat <- matrix("", nrow = 2*P, ncol = K)

fill_col <- function(m){
  ct <- coeftable(m)
  est <- setNames(rep(NA_real_, P), ideology)
  se  <- est
  common <- intersect(rownames(ct), ideology)
  est[common] <- ct[common, "Estimate"]
  se [common] <- ct[common, "Std. Error"]
  out <- character(2*P)
  out[seq(1, by = 2, length.out = P)] <- sprintf("%.3f", est)
  out[seq(2, by = 2, length.out = P)] <- sprintf("(%.3f)", se)
  out
}
for(k in seq_len(K)) coef_mat[, k] <- fill_col(fits[[k]])

# row/col names
rownames(coef_mat) <- as.vector(rbind(row_lbl[ideology], rep("", P)))
colnames(coef_mat) <- col_lbl[seq_len(K)]

# 4) Footer rows (mean from estimation sample; N; R2)
means <- sapply(fits, function(m) mean(as.numeric(fitted(m) + resid(m))))
Ns    <- sapply(fits, nobs)
R2s   <- sapply(fits, function(m) r2(m, "r2"))

foot <- rbind("Outcome variable mean" = sprintf("%.3f", means),
              "Observations"          = as.character(Ns),
              "R-squared"             = sprintf("%.3f", R2s))
colnames(foot) <- col_lbl[seq_len(K)]

# 5) Combine and write .xlsx with row names kept
out <- rbind(coef_mat, foot)
out <- data.frame(Variable = rownames(out), out, check.names = FALSE)
write_xlsx(out, path = "tables/regime_support.xlsx")



################################################
## Determinants of Policy Preferences
################################################

library(haven)
library(dplyr)
library(fixest)

# 1) Load, append, transform
dims   <- c("poli","econ","nati","trad","equi","ethn")
covars <- c("female","age","college","knowledge","urbanhukou","ccp",
            "married","working","income","class_percep","english")

s <- read_dta("data/sample2.dta") |> select(starts_with("index_"), all_of(covars), prov)
q <- read_dta("data/sample1.dta") |> select(starts_with("index_"), all_of(covars), prov)
dat <- bind_rows(q, s) |> mutate(age = age/10)

# 2) Labels
row_lbl <- c(female="Female", age="Age/10", college="College or above",
             knowledge="Political knowledge", urbanhukou="Urban Hukou",
             ccp="CCP member", married="Married", working="Having worked",
             income="Income bracket", class_percep="Perceived social class",
             english="English proficiency")
col_lbl <- c("(1) Politically liberal","(2) Pro-market","(3) Nationalistic",
             "(4) Traditionalistic","(5) Pro-equality","(6) Minority accommodating")

# 3) Run regressions
fits <- lapply(dims, function(dim)
  feols(as.formula(paste0("index_", dim, " ~ ",
                          paste(covars, collapse = " + "), " | prov")),
        data = dat, vcov = "hetero")
)

# 4) Build matrix: 2 rows per covariate; columns = outcomes
P <- length(covars); K <- length(fits)
coef_mat <- matrix("", nrow = 2*P, ncol = K)
colnames(coef_mat) <- col_lbl
rownames(coef_mat) <- as.vector(rbind(row_lbl[covars], rep("", P)))


fill_col <- function(m){
  ct <- coeftable(m)
  est <- setNames(rep(NA_real_, P), covars)
  se  <- est
  common <- intersect(rownames(ct), covars)
  est[common] <- ct[common, "Estimate"]
  se [common] <- ct[common, "Std. Error"]
  out <- character(2*P)
  out[seq(1, by = 2, length.out = P)] <- sprintf("%.3f", est)
  out[seq(2, by = 2, length.out = P)] <- sprintf("(%.3f)", se)
  out
}

for(k in seq_len(K)) coef_mat[, k] <- fill_col(fits[[k]])

# Row/column names

# 5) Footer rows
means <- sapply(fits, function(m) mean(as.numeric(fitted(m) + resid(m))))
Ns    <- sapply(fits, nobs)
R2s   <- sapply(fits, function(m) r2(m, "r2"))

foot <- rbind("Sample"                = rep("Pooled", K),
              "Outcome variable mean" = sprintf("%.3f", means),
              "Observations"          = as.character(Ns),
              "R-squared"             = sprintf("%.3f", R2s))
colnames(foot) <- col_lbl
out <- rbind(coef_mat, foot)

# 6) Combine & export (quote to preserve parentheses/newlines)
library(writexl)
out.df <- cbind(RowName = rownames(out), as.data.frame(out))
write_xlsx(as.data.frame(out.df), path = "tables/determinants.xlsx")


